The team has categorised our features into these 3 broad categories: Sentiment, Technical, General.
Twint can bypass this API, but when the team attempted in the usage of it, we realised there are some limitations with the package.Given:
Added:
A paper uses the following 3 below: Link
options(scipen = 99)
library(dplyr)
library(tidyverse)
library(stringr)
library(almanac)
library(lubridate)
library(Quandl)
library(gtrendsR)
library(RcppRoll)
library(xgboost)
library(MLmetrics)
library(pageviews)
library(quantmod)
library("doFuture"); registerDoFuture(); plan(multiprocess)
library(tidyquant)
library(tidymodels)
library(tsfeatures)
library(slider)
library(timetk)
library(data.table)
library(grid)
library(plotly) #for candlestick plot
library(hablar) #for bollinger band analysis
library(skimr)
# load the csv first
twitter_volume <- read.csv("data/twitter_volume.csv")
twitter_volume$ï..date <- as.Date(twitter_volume$ï..date)
colnames(twitter_volume) <- c("date", "tweets_volume")
bitcoin_model <- bitcoin_model %>% left_join(twitter_volume)
## Joining, by = "date"
bitcoin_model$tweets_volume[is.na(bitcoin_model$tweets_volume)] <- 0 # Replace the missing value before dec 2013 with 0
ggplot(bitcoin_model) +
geom_bar(aes(x=date, y=tweets_volume),stat="identity", fill="tan1", colour="sienna3")+
geom_line(aes(x=date, y=close*50),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./50, name = "bitcoin_price"))
# create Bollinger Bands
bitcoin_price_data <- bitcoin_price
bitcoin_price_data <- bitcoin_price_data %>%
convert(num(open:weighted_price))
bbands <- BBands(bitcoin_price_data[,c("high","low","close")]) #dn - The lower bollinger band, mavg - middle moving average, up - the upper bollinger band
# join and subset data
date_band <- "2011-09-13"
df_1 <- subset(cbind(bitcoin_price, data.frame(bbands[,1:3])), date >= date_band)
row.names(df_1) <- NULL
df_1 <- df_1 %>% mutate(bb_width = (up-dn)/mavg)
df_1 <- df_1 %>% mutate(bb_percent_b = (close-dn)/(up-dn))
bb_df <- df_1 %>% select(c(date,bb_width,bb_percent_b))
bitcoin_model <- bitcoin_model %>% left_join(bb_df)
## Joining, by = "date"
df_2 <- subset(df_1, date>= "2018-01-01" & date <= "2020-11-30")
# colors column for increasing and decreasing
for (i in 1:length(df_2[,1])) {
if (df_2$close[i] >= df_2$open[i]) {
df_2$direction[i] = 'Increasing'
} else {
df_2$direction[i] = 'Decreasing'
}
}
i <- list(line = list(color = '#008000'))
d <- list(line = list(color = '#FF0000'))
# plot candlestick chart
fig2 <- df_2 %>% plot_ly(x = ~date, type="candlestick",
open = ~open, close = ~close,
high = ~high, low = ~low, name = "BTC",
increasing = i, decreasing = d)
fig2 <- fig2 %>% add_lines(x = ~date, y = ~up , name = "B Bands",
line = list(color = '#000080', width = 0.5),
legendgroup = "Bollinger Bands",
hoverinfo = "none", inherit = F)
fig2 <- fig2 %>% add_lines(x = ~date, y = ~dn, name = "B Bands",
line = list(color = '#000080', width = 0.5),
legendgroup = "Bollinger Bands", inherit = F,
showlegend = FALSE, hoverinfo = "none")
fig2 <- fig2 %>% add_lines(x = ~date, y = ~mavg, name = "Mv Avg",
line = list(color = '#800000', width = 0.5),
hoverinfo = "none", inherit = F)
fig2 <- fig2 %>% layout(yaxis = list(title = "Price"))
# plot volume bar chart
fig3 <- df_2
fig3 <- fig3 %>% plot_ly(x=~date, y=~volume_currency, type='bar', name = "BTC Volume",
color = ~direction, colors = c('#008000','#FF0000'))
fig3 <- fig3 %>% layout(yaxis = list(title = "Volume"))
# create rangeselector buttons
rs <- list(visible = TRUE, x = 0.5, y = -0.055,
xanchor = 'center', yref = 'paper',
font = list(size = 9),
buttons = list(
list(count=1,
label='RESET',
step='all'),
list(count=1,
label='1 YR',
step='year',
stepmode='backward'),
list(count=3,
label='3 MO',
step='month',
stepmode='backward'),
list(count=1,
label='1 MO',
step='month',
stepmode='backward')
))
# subplot with shared x axis
fig2 <- subplot(fig2, fig3, heights = c(0.7,0.2), nrows=2,
shareX = TRUE, titleY = TRUE)
fig2 <- fig2 %>% layout(title = paste("BTC: " , "2018-01-01", " -","2020-11-30"),
xaxis = list(rangeselector = rs),
legend = list(orientation = 'h', x = 0.5, y = 1,
xanchor = 'center', yref = 'paper',
font = list(size = 10),
bgcolor = 'transparent'))
fig2
plot1 <- ggplot(df_2, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot2 <- ggplot(df_2, aes(x = date)) + geom_line(aes(y = bb_width, colour = "BOLL BW")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot1), ggplotGrob(plot2), size = "last"))
bitcoin_model_date <- as.data.frame(bitcoin_model$date)
rsi <- as.data.frame(RSI(bitcoin_model$close,14))
rsi <- cbind(bitcoin_model_date,rsi)
colnames(rsi) <- c("date","rsi")
bitcoin_model <- bitcoin_model %>% left_join(rsi)
## Joining, by = "date"
plot3 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot4 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = rsi, colour = "RSI")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot3), ggplotGrob(plot4), size = "last"))
# another visualisation instead
rsi_vs_close <- ggplot(bitcoin_model, aes(x = date))
rsi_vs_close <- rsi_vs_close + geom_line(aes(y = close, colour = "Bitcoin Closing Price"))
rsi_vs_close <- rsi_vs_close + geom_line(aes(y = rsi*100, colour = "Relative Strength Index"))
rsi_vs_close <- rsi_vs_close + scale_y_continuous(sec.axis = sec_axis(~./100, name = "Title"))
rsi_vs_close <- rsi_vs_close + scale_colour_manual(values = c("darkred", "steelblue"))
rsi_vs_close
We will take a look together with Band Width. RSI - tells the direction, bb_width - tells the intensity. E.g. Band width becoming narrower, can be either at the top of bottom of bollinger band, so to determine, look at RSI. RSI increasing from below 30 means is at the bottom of the band and hence can possibly buy.
# Combined with bb_width
# e.g. RSI going down --> towards oversold band width increases a lot too, may be an indication to buy
# e.g. RSI going up --> bullish, already gaining momentum, bandwidth increasing too, may also be an indication to buy, so depends on the risk appetite of investor when to enter.
plot2 <- ggplot(df_1, aes(x = date)) + geom_line(aes(y = bb_width, colour = "Bollinger Band Width")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
plot3 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = close, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plot4 <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = rsi, colour = "RSI")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot3), ggplotGrob(plot4), ggplotGrob(plot2), size = "last"))
# Usually, K = 9 days, S = 12 days and L = 26 days
myMACD <- function (x,price,S,L,K){
MACD <- EMA(price,S) - EMA(price,L)
signal <- EMA(MACD,K)
date <- x[,1]
price <- price
output <- cbind(date,price, MACD,signal)
colnames(output) <- c("date","closing_price", "MACD","signal")
return(output)
}
macd <- myMACD(bitcoin_price,Cl(bitcoin_price), 12, 26,9)
tail(macd,n=5)
## date closing_price MACD signal
## 3367 2020-11-30 19700.19 1023.825 1097.046
## 3368 2020-12-01 18771.43 1004.070 1078.451
## 3369 2020-12-02 19226.55 1013.456 1065.452
## 3370 2020-12-03 19448.40 1026.958 1057.753
## 3371 2020-12-04 19437.67 1024.977 1051.198
macd <- macd %>% mutate(macd_signal_dist = MACD-signal)
macd_df <- macd %>% select(c(date,macd_signal_dist))
bitcoin_model <- bitcoin_model %>% left_join(macd_df)
## Joining, by = "date"
macd$direction <- ifelse(macd$macd_signal_dist>0,"Increasing","Decreasing")
macd_chart <- ggplot(macd, aes(x = date))
macd_chart <- macd_chart + geom_line(aes(y = MACD, colour = "Moving Average Covergence Divergence (12,6,9)"))
macd_chart <- macd_chart + geom_line(aes(y = signal, colour = "Signal"), linetype="dashed")
macd_chart <- macd_chart + geom_bar(aes(y = macd_signal_dist,fill = direction),stat = "identity") + scale_fill_manual(values = c("Increasing" = "#008000", "Decreasing" = "#FF0000"))
macd_chart <- macd_chart + scale_colour_manual(values = c("darkred", "steelblue"))
macd_chart <- macd_chart + scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')))
macd_chart
plot5<- ggplot(macd, aes(x = date)) + geom_line(aes(y = closing_price, colour = "Bitcoin Closing Price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plot5), ggplotGrob(macd_chart), size = "last"))
acu_dis_df <- bitcoin_price
acu_dis_df <- acu_dis_df %>% mutate(cmfv = ((bitcoin_price$close-bitcoin_price$low)-(bitcoin_price$high-bitcoin_price$close))/(bitcoin_price$high-bitcoin_price$low)*bitcoin_price$volume_currency)
acu_dis_df$cmfv[is.na(acu_dis_df$cmfv)] <- 0
acu_dis_df <- acu_dis_df %>% mutate(cumulative_cmfv = cumsum(cmfv))
acu_dis_df_add <- acu_dis_df %>% select(date,cumulative_cmfv)
bitcoin_model <- bitcoin_model %>% left_join(acu_dis_df_add)
## Joining, by = "date"
ggplot(acu_dis_df) +
geom_line(aes(x=date, y=cumulative_cmfv),stat="identity", colour="sienna3")+
geom_line(aes(x=date, y=close*1000000),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./1000000, name = "bitcoin_price")) +
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y"))
EMA: More weightage placed on recent price levels rather than past ones compared to using SMA
# 12-days EMA, 24-days EMA, 72-days EMA
bitcoin_date <- as.data.frame(bitcoin_price$date)
colnames(bitcoin_date) <- c("date")
bitcoin_ema <- list()
range_ema <- c(12,24,72)
for (i in seq_along(range_ema)){
bitcoin_ema_v <- EMA(Cl(bitcoin_price),n=i)
bitcoin_ema[[i]] <- bitcoin_ema_v
}
bitcoin_ema <- as.data.frame(do.call(cbind, bitcoin_ema))
names_ema <- c(paste0("ema_", range_ema)) %>% strsplit(split = " ")
colnames(bitcoin_ema) <- names_ema
ema <- bitcoin_date %>% cbind(bitcoin_ema)
bitcoin_model <- bitcoin_model %>% left_join(ema)
## Joining, by = "date"
# 12-days SMA, 24-days SMA, 72-days SMA
bitcoin_sma <- list()
range_sma <- c(12,24,72)
for (i in seq_along(range_sma)){
bitcoin_sma_v <- SMA(Cl(bitcoin_price),n=i)
bitcoin_sma[[i]] <- bitcoin_sma_v
}
bitcoin_sma <- as.data.frame(do.call(cbind, bitcoin_sma))
names_sma <- c(paste0("sma_", range_sma)) %>% strsplit(split = " ")
colnames(bitcoin_sma) <- names_sma
sma <- bitcoin_date %>% cbind(bitcoin_sma)
bitcoin_model <- bitcoin_model %>% left_join(sma)
## Joining, by = "date"
# Assumptions:
# short term: 14,28
# longer term: 90,180,200
bitcoin_roc <- list()
range_roc <- c(14,28,90,180,200)
for (i in seq_along(range_roc)){
bitcoin_roc_v <- ROC(Cl(bitcoin_price),n=i)
bitcoin_roc[[i]] <- bitcoin_roc_v
}
bitcoin_roc <- as.data.frame(do.call(cbind, bitcoin_roc))
names_roc <- c(paste0("roc_", range_roc)) %>% strsplit(split = " ")
colnames(bitcoin_roc) <- names_roc
bitcoin_roc <- bitcoin_date %>% cbind(bitcoin_roc)
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_roc)
## Joining, by = "date"
ROC states that whenever the Rate of Change goes above zero line from below, it states a positive momentum while when the ROC goes below zero line from above; it generates a negative momentum in the price.
plota <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_14, colour = "ROC_14days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("steelblue"))
plotb <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_90, colour = "ROC_90days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("darkred"))
plotc <- ggplot(bitcoin_model, aes(x = date)) + geom_line(aes(y = roc_200, colour = "ROC_200days")) +
scale_x_date(limits = as.Date(c('2020-01-01','2020-06-30')),labels=date_format("%b-%Y")) +
scale_colour_manual(values = c("green"))
grid.newpage()
grid.draw(rbind(ggplotGrob(plota), ggplotGrob(plotb), ggplotGrob(plotc), size = "last"))
bitcoin_obv <- as.data.frame(OBV(bitcoin_price$close, bitcoin_price$volume_currency))
bitcoin_obv <- bitcoin_date %>% cbind(bitcoin_obv)
colnames(bitcoin_obv) <- c("date","on_balance_vol")
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_obv)
## Joining, by = "date"
# Data about bitcoin activity, transaction fees, and mining.
print("Downloading bitcoin indicators.")
## [1] "Downloading bitcoin indicators."
code_list <- list(c("BCHAIN/TOTBC", "Total Bitcoins"),
c("BCHAIN/MKTCP", "Bitcoin Market Capitalization"),
c("BCHAIN/NADDU", "Bitcoin Number of Unique Addresses Used"),
c("BCHAIN/ETRAV", "Bitcoin Estimated Transaction Volume BTC"),
c("BCHAIN/ETRVU", "Bitcoin Estimated Transaction Volume USD"),
c("BCHAIN/TRVOU", "Bitcoin USD Exchange Trade Volume"),
c("BCHAIN/NTRAN", "Bitcoin Number of Transactions"),
c("BCHAIN/NTRAT", "Bitcoin Total Number of Transactions"),
c("BCHAIN/NTREP", "Bitcoin Number of Transactions Excluding Popular Addresses"),
c("BCHAIN/NTRBL", "Bitcoin Number of Tansaction per Block"),
c("BCHAIN/ATRCT", "Bitcoin Median Transaction Confirmation Time"),
c("BCHAIN/TRFEE", "Bitcoin Total Transaction Fees"),
c("BCHAIN/TRFUS", "Bitcoin Total Transaction Fees USD"),
c("BCHAIN/CPTRA", "Bitcoin Cost Per Transaction"),
c("BCHAIN/CPTRV", "Bitcoin Cost % of Transaction Volume"),
c("BCHAIN/BLCHS", "Bitcoin api.blockchain Size"),
c("BCHAIN/AVBLS", "Bitcoin Average Block Size"),
c("BCHAIN/TOUTV", "Bitcoin Total Output Volume"),
c("BCHAIN/HRATE", "Bitcoin Hash Rate"),
c("BCHAIN/MIREV", "Bitcoin Miners Revenue"),
c("BCHAIN/BCDDE", "Bitcoin Days Destroyed"),
c("BCHAIN/BCDDW", "Bitcoin Days Destroyed Minimum Age 1 Week"),
c("BCHAIN/BCDDM", "Bitcoin Days Destroyed Minimum Age 1 Month"),
c("BCHAIN/BCDDY", "Bitcoin Days Destroyed Minimum Age 1 Year") ,
c("BCHAIN/BCDDC", "Bitcoin Days Destroyed Cumulative"))
bitcoin_data <- tibble()
# bitcoin_data <- foreach(i=seq_along(code_list), .combine='bind_rows') %dopar% {quandl_tidy(code_list[[i]][1], code_list[[i]][2])}
for (i in seq_along(code_list)) {
print(str_c("Downloading data for ", code_list[[i]][1], "."))
bitcoin_data <- bind_rows(bitcoin_data,
quandl_tidy(code_list[[i]][1], code_list[[i]][2]))
}
## [1] "Downloading data for BCHAIN/TOTBC."
## [1] "Downloading data for BCHAIN/MKTCP."
## [1] "Downloading data for BCHAIN/NADDU."
## [1] "Downloading data for BCHAIN/ETRAV."
## [1] "Downloading data for BCHAIN/ETRVU."
## [1] "Downloading data for BCHAIN/TRVOU."
## [1] "Downloading data for BCHAIN/NTRAN."
## [1] "Downloading data for BCHAIN/NTRAT."
## [1] "Downloading data for BCHAIN/NTREP."
## [1] "Downloading data for BCHAIN/NTRBL."
## [1] "Downloading data for BCHAIN/ATRCT."
## [1] "Downloading data for BCHAIN/TRFEE."
## [1] "Downloading data for BCHAIN/TRFUS."
## [1] "Downloading data for BCHAIN/CPTRA."
## [1] "Downloading data for BCHAIN/CPTRV."
## [1] "Downloading data for BCHAIN/BLCHS."
## [1] "Downloading data for BCHAIN/AVBLS."
## [1] "Downloading data for BCHAIN/TOUTV."
## [1] "Downloading data for BCHAIN/HRATE."
## [1] "Downloading data for BCHAIN/MIREV."
## [1] "Downloading data for BCHAIN/BCDDE."
## [1] "Downloading data for BCHAIN/BCDDW."
## [1] "Downloading data for BCHAIN/BCDDM."
## [1] "Downloading data for BCHAIN/BCDDY."
## [1] "Downloading data for BCHAIN/BCDDC."
# Clean Bitcoin Indicators
bitcoin_data <- bitcoin_data %>%
select(date, value, code) %>%
spread(code, value)
colnames(bitcoin_data) <- make.names(colnames(bitcoin_data))
names(bitcoin_data) <- tolower(names(bitcoin_data))
# NVT
bitcoin_data$nvt <- bitcoin_data$bchain.mktcp/bitcoin_data$bchain.etrvu
bitcoin_data_nvt <- select(bitcoin_data,c(date,nvt))
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_data_nvt)
## Joining, by = "date"
atr <- ATR(bitcoin_price[,c("high","low","close")], n=14) #14-days moving average
atr <- bitcoin_date %>% cbind(atr)
atr <- select(atr,c(date,atr))
bitcoin_model <- bitcoin_model %>% left_join(atr)
## Joining, by = "date"
# Scrape Google Data
download_all <- FALSE
if (download_all == TRUE) {
dates <- tibble(dates = ymd("2011-01-01") + months(0:120)) %>% filter(dates <= Sys.Date())
} else {
dates <- tibble(dates = ymd("2017-01-01") + months(0:120)) %>% filter(dates <= Sys.Date())
}
google_trends <- function(query, begin_date, end_date) {
df <- gtrends(keyword = 'bitcoin',
time = str_c(begin_date, ' ', end_date))[['interest_over_time']] %>%
select(date, hits) %>%
mutate(date = as.Date(date)) %>%
as_tibble()
return(df)
}
# Download Google Trends Daily Data
for (i in 1:nrow(dates)) {
month <- dates[["dates"]][i]
begin_date <- as.Date(month)
end_date <- as.Date(month) + months(1) - days(1)
end_date <- as.Date(ifelse(end_date >= Sys.Date(), Sys.Date(), end_date))
print(str_c("Downloading Google Trends data from ", begin_date, " to ", end_date, "."))
df <- google_trends("bitcoin", begin_date, end_date)
write_csv(df, str_c("C:\\Users\\marcu\\Desktop\\NUS BBA3\\Y4.S1\\DBA4761\\Final Project\\Own Version\\google-trend-daily\\google-trends-daily-", begin_date, "-", end_date, ".csv"))
}
# Download Google Trends Monthly Data
monthly <- google_trends("bitcoin", "2011-01-01", Sys.Date()) %>%
rename(hits_monthly = hits)
# change those <1 to 1 as later the conversion to numeric will not have NA instead
monthly$hits_monthly[monthly$hits_monthly=="<1"]<- "1"
# Rebase
bitcoin_google <- list.files('google-trend-daily') %>%
map_df(~ read_csv(file.path('google-trend-daily', str_c(.)), col_types = c('Di'))) %>%
rename(hits_daily = hits) %>%
left_join(monthly) %>%
fill(hits_monthly) %>%
mutate(hits_monthly = as.numeric(hits_monthly),
hits_daily = hits_daily * hits_monthly / 100)
The 3 chunks above are how the google trends were obtained. Since we have already ran this previously, now we will just load in the data that we have saved as csv and add to the model below.
bitcoin_google <- read.csv("data/bitcoin_google.csv")
bitcoin_google$date <- as.Date(bitcoin_google$date)
# add to model
bitcoin_model <- bitcoin_model %>%
left_join(bitcoin_google)
## Joining, by = "date"
bitcoin_data_df <- select(bitcoin_data, -c(nvt))
bitcoin_model <- bitcoin_model %>% left_join(bitcoin_data_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>%
mutate(close_drawdown = -1 * (1 - close / cummax(close)))
get_yahoo <- function(ticker) {
df <- getSymbols(ticker, src = 'yahoo', auto.assign = FALSE, from = '1900-01-01')
df <- df %>%
as_tibble() %>%
mutate(date = index(df))
colnames(df) <- c("open", "high", "low", "close", "volume", "adjusted_close", "date", "ticker")
return(df)
}
vix <- get_yahoo('^VIX') %>%
select(date, vix = adjusted_close)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
bitcoin_model <- bitcoin_model %>% left_join(vix)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(vix, .direction = "down")
# load the csv first
fedfundsrate <- read.csv("data/fedfundsrate.csv")
fedfundsrate$DATE <- as.Date(fedfundsrate$DATE)
fedfundsrate <- subset(fedfundsrate, DATE>=as.Date("2011-09-13"))
colnames(fedfundsrate) <- c("date", "effr")
fedfundsrate$effr[fedfundsrate$effr=="."] <- NA
fedfundsrate <- fedfundsrate %>% map_df(na.locf)
fedfundsrate$effr <- as.numeric(fedfundsrate$effr)
bitcoin_model <- bitcoin_model %>% left_join(fedfundsrate)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(effr,.direction = "down")
sp500 <- get_yahoo('^GSPC') %>%
select(date, sp500 = adjusted_close)
bitcoin_model <- bitcoin_model %>% left_join(sp500)
## Joining, by = "date"
# fill na downwards for weekend missing values - fill with fri closing price
bitcoin_model <- bitcoin_model %>% fill(sp500, .direction = "down")
# load the csv first
xau_usd_historical <- read.csv("data/xau_usd_historical.csv")
xau_usd_historical$ï..Date <- as.Date(xau_usd_historical$ï..Date, format="%B %d, %Y")
xau_usd_df <- select(xau_usd_historical,c(ï..Date,Price))
colnames(xau_usd_df) <- c("date","gold_price")
bitcoin_model <- bitcoin_model %>% left_join(xau_usd_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(gold_price,.direction = "down")
ggplot(bitcoin_model) +
geom_line(aes(x=date, y=close),stat="identity", colour="sienna3")+
geom_line(aes(x=date, y=gold_price*10),stat="identity") + scale_y_continuous(sec.axis = sec_axis(~./10, name = "gold_price"))
scale_x_date(limits = as.Date(c('2018-01-01','2020-11-30')),labels=date_format("%b-%Y"))
## <ScaleContinuousDate>
## Range:
## Limits: 1.75e+04 -- 1.86e+04
The OFR Financial Stress Index (OFR FSI) is a daily market-based snapshot of stress in global financial markets. It is constructed from 33 financial market variables, such as yield spreads, valuation measures, and interest rates. The OFR FSI is positive when stress levels are above average, and negative when stress levels are below average.
The OFR FSI incorporates five categories of indicators: credit, equity valuation, funding, safe assets and volatility. The FSI shows stress contributions by three regions: United States, other advanced economies, and emerging markets. The value of the OFR FSI on a given day is the weighted average level of each variable observed in the market on that day, relative to its history. The index is zero when this average is zero, suggesting that stress is at normal levels. The index is calculated after each U.S. trading day.
# load the csv first
fsi<- read.csv("data/fsi.csv")
fsi_df <- fsi %>% select(date="Date", ofr_fsi = "OFR.FSI")
fsi_df$date <- as.Date(fsi_df$date)
bitcoin_model <- bitcoin_model %>% left_join(fsi_df)
## Joining, by = "date"
#weekend missing, fill by fri's numbers
bitcoin_model <- bitcoin_model %>% fill(ofr_fsi,.direction = "down")
The AAII Investor Sentiment Survey measures the percentage of individual investors who are bullish, bearish, and neutral on the stock market for the next six months; individuals are polled from the ranks of the AAII membership on a weekly basis. Only one vote per member is accepted in each weekly voting period.
Since this AAII sentiment (comprises bullish, neutral and bearish sentiment) is for stock market, we take the assumption that bitcoin is seen as a “safe haven” compared to stock, so we will take the bearish sentiment.
This is a weekly number, so we will just fill the every day with that week’s number.
# The quandl_tidy function is a wrapper around the Quandl function that returns a cleaner tibble.
Quandl.api_key("s6CuJx8yVodVwU-ymz_5")
aaii_sentiment <- Quandl("AAII/AAII_SENTIMENT") %>%
arrange(Date) %>%
as_tibble()
aaii_sentiment_df <- aaii_sentiment %>% select(date="Date", aaii_bearish_sentiment = "Bearish")
bitcoin_model <- bitcoin_model %>% left_join(aaii_sentiment_df)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(aaii_bearish_sentiment,.direction = "up")
cny_usd <- get_yahoo('USDCNY=X') %>%
select(date, cny_usd_close = adjusted_close)
bitcoin_model <- bitcoin_model %>% left_join(cny_usd)
## Joining, by = "date"
# fill na downwards for weekend missing values - fill with fri closing price
bitcoin_model <- bitcoin_model %>% fill(cny_usd_close, .direction = "down")
# load in the data first
us_dollar_index_historical_data <- read.csv("data/us_dollar_index_historical_data.csv")
us_dollar_index_historical_data$ï..Date <- as.Date(us_dollar_index_historical_data$ï..Date, format="%B %d, %Y" )
us_dollar_index <- select(us_dollar_index_historical_data, c(ï..Date,Price))
colnames(us_dollar_index) <- c("date","dollar_index_close")
bitcoin_model <- bitcoin_model %>% left_join(us_dollar_index)
## Joining, by = "date"
bitcoin_model <- bitcoin_model %>% fill(dollar_index_close, .direction = "down")
Another way to get a comparison of how any particular crypto is moving relative to the crypto market is to build an index to compare it to. An index will also be able to show trends and changes in investing patterns.
We have created a simple crypto index based on weighted market cap of top 10 crypto. (Market capitalization-weighted). Using an index, an investor can see a mood of the market and can make informed decisions.
Firstly, Asset selection:
Secondly, Allocation Distribution:
top10_crypto <- read.csv("data/top10_crypto.csv")
top10_crypto$ï..date <- as.Date(top10_crypto$ï..date, format="%B %d, %Y")
colnames(top10_crypto) <- c("date", "open", "high", "low", "close", "volume", "market_cap", "symbol")
mkt_cap_total <- top10_crypto %>%
group_by(date) %>%
summarise(sqrt_sum_mkt_cap = sum(sqrt(market_cap)))
## `summarise()` ungrouping output (override with `.groups` argument)
top10_crypto_df <- top10_crypto %>% left_join(mkt_cap_total)
## Joining, by = "date"
top10_crypto_df <- top10_crypto_df %>% mutate(weightage = sqrt(market_cap)/sqrt_sum_mkt_cap)
top10_crypto_df <- top10_crypto_df %>% group_by(date)
# when take sqrt (the weightage of each crypto in the top10 mix will be more acceptable)
# e.g. 2018-01-01: BTC - 28.4%, ETH - 16.2%, XRP - 18.1%, USDT - 2.2%, LTC - 6.6%, BCH - 12.0%, LINK - 0.9%, ADA - 8.1%, BNB - 1.7%, XLM - 5.5%. Compared to if merely based on market cap, bitcoin would have a proportion of 70%.
#lets assume the base of this index is 100, so the
divisor = subset(mkt_cap_total, date == "2018-01-01")$sqrt_sum_mkt_cap/100
mkt_cap_total <- mkt_cap_total %>% mutate(crypto_index = sqrt_sum_mkt_cap/divisor)
crypto_index <- mkt_cap_total %>% tq_mutate(select = crypto_index,
mutate_fun = periodReturn,
period = 'daily',
type = 'log',
col_rename = 'crypto_index_return')
# we will add the returns of index and index into the model
crypto_index_df <- select(crypto_index, c("date","crypto_index", "crypto_index_return"))
bitcoin_model <- bitcoin_model %>% left_join(crypto_index_df)
## Joining, by = "date"
bitcoin_model <- subset(bitcoin_model, date>= "2018-01-01" & date <= "2020-11-30")
bitcoin_features <- select(bitcoin_model, -c(close,future_return,future_return_sign))
ggplot(bitcoin_model, aes(x = date)) +
geom_line(aes(y = crypto_index), colour = "red", alpha = 0.8)
ggplot(bitcoin_model, aes(x = date)) +
geom_line(aes(y = future_return), colour = "blue", alpha = 0.8) +
geom_line(aes(y = crypto_index_return), colour = "red", alpha = 0.6)
The following indicators does not have values since 2017, and there are no alternative sites that provide these data, so we will leave these out:
This feature has an alternative site that we can take the data from:
bitcoin_features <- select(bitcoin_model, -c(bchain.bcddc,bchain.bcdde,bchain.bcddm,bchain.bcddw,bchain.bcddy,bchain.atrct))
# bchain.atrct - Bitcoin Median Transaction Confirmation Time
# load in the data first
median_confirmation_time_bitcoin <- read.csv("data/median-confirmation-time-bitcoin.csv")
median_confirmation_time_bitcoin$ï..date <- as.Date(median_confirmation_time_bitcoin$ï..date, format="%d/%m/%Y" )
colnames(median_confirmation_time_bitcoin) <- c("date", "bchain.atrct")
bitcoin_features <- bitcoin_features %>% left_join(median_confirmation_time_bitcoin)
## Joining, by = "date"
This feature missing one value on 23 May 2020
## number obtained from blockchain website: 588174
bitcoin_features[bitcoin_features$date=="2020-05-23", "bchain.naddu"] <- 588174
# hits daily and monthly 30 Nov 2020 missing, currently fill with previous day
bitcoin_features <- bitcoin_features %>% fill(hits_daily,.direction = "down")
bitcoin_features <- bitcoin_features %>% fill(hits_monthly,.direction = "down")
Confirm if there are no missing values to prepare for models
rmarkdown::paged_table(bitcoin_features)
skim(bitcoin_features)
| Name | bitcoin_features |
| Number of rows | 1065 |
| Number of columns | 57 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 1 |
| numeric | 55 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2018-01-01 | 2020-11-30 | 2019-06-17 | 1065 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| future_return_sign | 0 | 1 | FALSE | 2 | 1: 558, 0: 507 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| close | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| future_return | 0 | 1 | 0.00 | 0.04 | -0.39 | -0.01 | 0.00 | 0.02 | 0.18 | ▁▁▁▇▁ |
| tweets_volume | 0 | 1 | 96273.55 | 47910.80 | 44292.00 | 65777.00 | 80060.00 | 110723.00 | 371290.00 | ▇▂▁▁▁ |
| bb_width | 0 | 1 | 0.24 | 0.15 | 0.03 | 0.12 | 0.21 | 0.32 | 0.88 | ▇▆▂▁▁ |
| bb_percent_b | 0 | 1 | 0.53 | 0.37 | -0.75 | 0.26 | 0.53 | 0.82 | 1.79 | ▁▅▇▅▁ |
| rsi | 0 | 1 | 51.23 | 13.74 | 9.63 | 42.19 | 50.09 | 59.61 | 88.71 | ▁▃▇▃▁ |
| macd_signal_dist | 0 | 1 | 1.23 | 118.18 | -538.72 | -57.77 | 6.94 | 61.00 | 472.25 | ▁▂▇▃▁ |
| cumulative_cmfv | 0 | 1 | 7512870167.75 | 1723437413.02 | 4846877102.15 | 6023132825.36 | 7010265118.99 | 8550339719.48 | 12723602645.28 | ▇▂▆▂▁ |
| ema_12 | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| ema_24 | 0 | 1 | 8279.18 | 2854.91 | 3195.18 | 6457.76 | 8213.66 | 9877.74 | 19132.48 | ▃▇▅▁▁ |
| ema_72 | 0 | 1 | 8276.99 | 2842.63 | 3208.65 | 6454.71 | 8233.00 | 9919.76 | 18810.45 | ▃▇▅▁▁ |
| sma_12 | 0 | 1 | 8281.74 | 2870.47 | 3179.54 | 6456.77 | 8208.69 | 9902.00 | 19700.19 | ▅▇▃▁▁ |
| sma_24 | 0 | 1 | 8279.01 | 2856.62 | 3186.66 | 6446.83 | 8210.40 | 9900.68 | 18946.64 | ▃▇▅▁▁ |
| sma_72 | 0 | 1 | 8276.36 | 2845.42 | 3189.69 | 6450.57 | 8212.51 | 9901.66 | 18758.88 | ▃▇▅▁▁ |
| roc_14 | 0 | 1 | 0.00 | 0.04 | -0.49 | -0.01 | 0.00 | 0.02 | 0.17 | ▁▁▁▇▁ |
| roc_28 | 0 | 1 | 0.00 | 0.06 | -0.49 | -0.02 | 0.00 | 0.03 | 0.22 | ▁▁▁▇▁ |
| roc_90 | 0 | 1 | 0.00 | 0.07 | -0.49 | -0.03 | 0.00 | 0.03 | 0.25 | ▁▁▂▇▁ |
| roc_180 | 0 | 1 | 0.00 | 0.08 | -0.51 | -0.04 | 0.00 | 0.04 | 0.26 | ▁▁▂▇▁ |
| roc_200 | 0 | 1 | 0.00 | 0.09 | -0.61 | -0.04 | 0.01 | 0.05 | 0.27 | ▁▁▂▇▁ |
| on_balance_vol | 0 | 1 | -1096409073.75 | 1101578392.07 | -2940622853.82 | -2044454043.25 | -1056244942.69 | -339411405.58 | 2581654689.52 | ▇▇▇▂▁ |
| nvt | 0 | 1 | 153.65 | 74.52 | 27.65 | 106.15 | 132.72 | 177.71 | 470.50 | ▆▇▂▁▁ |
| atr | 0 | 1 | 480.16 | 320.36 | 86.93 | 301.27 | 405.51 | 538.06 | 1888.64 | ▇▅▁▁▁ |
| hits_daily | 0 | 1 | 10.29 | 4.33 | 4.56 | 7.60 | 9.28 | 11.48 | 53.00 | ▇▁▁▁▁ |
| hits_monthly | 0 | 1 | 16.06 | 8.28 | 8.00 | 12.00 | 14.00 | 18.00 | 53.00 | ▇▂▁▁▁ |
| bchain.avbls | 0 | 1 | 1.04 | 0.20 | 0.43 | 0.92 | 1.06 | 1.21 | 1.42 | ▁▂▆▇▅ |
| bchain.blchs | 0 | 1 | 226415.94 | 47234.65 | 149113.62 | 183647.83 | 224988.07 | 266481.87 | 312990.88 | ▇▇▆▇▆ |
| bchain.cptra | 0 | 1 | 51.95 | 24.10 | 18.00 | 33.93 | 48.73 | 61.94 | 146.40 | ▇▇▂▁▁ |
| bchain.cptrv | 0 | 1 | 1.51 | 0.83 | 0.28 | 0.95 | 1.36 | 1.80 | 5.72 | ▇▆▂▁▁ |
| bchain.etrav | 0 | 1 | 141157.56 | 65479.74 | 37558.21 | 99809.22 | 133866.30 | 168755.87 | 629491.33 | ▇▃▁▁▁ |
| bchain.etrvu | 0 | 1 | 1180597105.11 | 735311420.01 | 223512708.83 | 664911253.63 | 991675387.16 | 1528500615.48 | 5164208947.92 | ▇▃▁▁▁ |
| bchain.hrate | 0 | 1 | 72231222.53 | 37344448.32 | 13727615.88 | 41672924.13 | 58125055.93 | 107494237.74 | 162263115.42 | ▇▇▅▆▂ |
| bchain.mirev | 0 | 1 | 14090625.39 | 5644452.40 | 4750978.91 | 10379765.62 | 13411241.26 | 16961771.12 | 46900388.18 | ▇▇▁▁▁ |
| bchain.mktcp | 0 | 1 | 147617221755.42 | 52610262701.00 | 56200958927.90 | 112434867969.00 | 144998538657.00 | 178943347659.00 | 357155729656.00 | ▆▇▅▁▁ |
| bchain.naddu | 0 | 1 | 534927.78 | 104397.24 | 310119.00 | 459478.00 | 526880.00 | 598670.00 | 1054711.00 | ▃▇▃▁▁ |
| bchain.ntran | 0 | 1 | 285477.61 | 59811.49 | 135129.00 | 240652.00 | 295589.00 | 330103.00 | 452646.00 | ▂▅▇▆▁ |
| bchain.ntrat | 0 | 1 | 428964559.51 | 91197055.55 | 287815664.00 | 343716805.00 | 424766007.00 | 510432934.00 | 591325128.00 | ▇▆▅▆▆ |
| bchain.ntrbl | 0 | 1 | 1948.22 | 426.53 | 834.13 | 1647.23 | 2057.52 | 2280.87 | 2762.54 | ▂▃▅▇▃ |
| bchain.ntrep | 0 | 1 | 275379.37 | 57782.80 | 129732.00 | 231918.00 | 286027.00 | 320995.00 | 437027.00 | ▂▅▇▆▁ |
| bchain.totbc | 0 | 1 | 17748971.35 | 540864.46 | 16774500.00 | 17284587.50 | 17762812.50 | 18260750.00 | 18557900.00 | ▅▅▅▅▇ |
| bchain.toutv | 0 | 1 | 1330607.32 | 1049236.99 | 421940.21 | 864866.80 | 1097641.32 | 1518114.01 | 24528670.35 | ▇▁▁▁▁ |
| bchain.trfee | 0 | 1 | 64.99 | 94.55 | 9.97 | 22.56 | 33.39 | 72.11 | 778.26 | ▇▁▁▁▁ |
| bchain.trfus | 0 | 1 | 664590.94 | 1343382.55 | 42133.15 | 146878.04 | 269893.77 | 664514.69 | 12045172.46 | ▇▁▁▁▁ |
| bchain.trvou | 0 | 1 | 317567460.47 | 338531702.16 | 25368503.34 | 113743420.03 | 203978693.83 | 382232905.93 | 3094258118.19 | ▇▁▁▁▁ |
| close_drawdown | 0 | 1 | -0.57 | 0.15 | -0.83 | -0.66 | -0.57 | -0.48 | 0.00 | ▃▇▅▁▁ |
| vix | 0 | 1 | 20.23 | 10.02 | 9.15 | 13.43 | 16.57 | 24.32 | 82.69 | ▇▂▁▁▁ |
| effr | 0 | 1 | 1.49 | 0.86 | 0.04 | 1.09 | 1.70 | 2.20 | 2.45 | ▆▁▂▇▇ |
| sp500 | 0 | 1 | 2937.29 | 263.55 | 2237.40 | 2747.33 | 2884.43 | 3110.29 | 3638.35 | ▁▆▇▃▂ |
| gold_price | 0 | 1 | 1467.09 | 231.05 | 1174.16 | 1289.31 | 1353.46 | 1613.79 | 2063.19 | ▇▃▂▂▂ |
| ofr_fsi | 0 | 1 | -1.70 | 2.31 | -4.24 | -2.89 | -2.25 | -1.43 | 10.27 | ▇▂▁▁▁ |
| aaii_bearish_sentiment | 0 | 1 | 0.34 | 0.09 | 0.16 | 0.26 | 0.32 | 0.41 | 0.53 | ▂▇▅▅▃ |
| cny_usd_close | 0 | 1 | 6.82 | 0.24 | 6.27 | 6.71 | 6.88 | 7.00 | 7.18 | ▃▁▅▇▆ |
| dollar_index_close | 0 | 1 | 95.61 | 2.82 | 88.50 | 93.80 | 96.37 | 97.46 | 103.61 | ▂▃▇▅▁ |
| crypto_index | 0 | 1 | 63.40 | 12.62 | 39.20 | 55.36 | 62.08 | 70.88 | 115.30 | ▃▇▃▁▁ |
| crypto_index_return | 0 | 1 | 0.00 | 0.02 | -0.22 | -0.01 | 0.00 | 0.01 | 0.08 | ▁▁▁▇▁ |
| bchain.atrct | 0 | 1 | 9.48 | 3.21 | 3.37 | 7.22 | 8.92 | 11.10 | 25.25 | ▆▇▂▁▁ |